# import libraries
import altair as alt
import pandas as pd
# Load the dataset into a Pandas DataFrame
data = pd.read_excel('SFTT.xlsx')
xmin = data['Point X'].min()
xmax = data['Point X'].max()
ymin = data['Point Y'].min()
ymax = data['Point Y'].max()
ratio = (xmax - xmin) / (ymax - ymin)
height = 400
geoscalar = 1.2 # Amount to scale the size of the geographical distribution
width = height * ratio # Multiply by ratio for equirectangular maps
# Remove white spaces from column names
data = data.rename(columns=lambda x: x.strip())
data = data[data['Tree Present'] == True]
data = data.astype({'Postal Code': 'string'})
data['Postal Code'] = data['Postal Code'].apply(lambda x: '{0:0>7}'.format(x))
data['Postal Code'] = data['Postal Code'].str[:-2]
# Drop rows with missing values in the 'Species' column
if 'Species' in data.columns:
data = data.dropna(subset=['Species'])
# The color messed with the data type
data = data.rename(columns = {'Tree: Tree Condition': "Tree Condition"})
# Clean our data, take out postal codes that are too far from the Mass Ave Corridor
#data = data[~data['Postal Code'].isin(['02116', '02130', '02136', '00021', '000<N', '02129', '02109', '02113', '02122', '02128', '02201', '02215', '02445', '02124', '02127', '02110', '02108'])]
data = data[~data['Postal Code'].isin(['000<N', '02136', '02129', '02445'])]
postal_codes = data['Postal Code'].unique()
options = ['02115', '02120', '02119', '02118', '02121', '02125']
labels = [option + ' ' for option in options]
input_dropdown = alt.binding_radio(
# Add the empty selection which shows all when clicked
options=options + [None],
labels=labels + ['All'],
name='Postal Codes: '
)
code_select = alt.selection_point(
fields=['Postal Code'],
bind=input_dropdown,
)
# Source: https://altair-viz.github.io/user_guide/marks/geoshape.html
geobounds = alt.Feature(
type = 'Feature',
properties = {},
geometry = alt.Geometry(
alt.Polygon(
type = 'Polygon',
coordinates = [[
[xmax, ymax],
[xmax, ymin],
[xmin, ymin],
[xmin, ymax],
[xmax, ymax]
]]
)
)
)
# The following code is taken from the Altair Tutorial done in class on 5/23/23
zip_code_url = 'https://raw.githubusercontent.com/ethangu8/DS4200/main/ZIP_Codes%20(1).geojson'
zip_codes = alt.topo_feature(zip_code_url, feature='features')
zip_code_map = alt.Chart(zip_codes).mark_geoshape(
fill = 'lightgray',
stroke = 'white',
clip = True
).project(
type = 'equirectangular',
fit = geobounds
).properties(
height = height * geoscalar,
width = width * geoscalar
)
from collections import Counter
a = Counter(data['Common Name'].tolist()).most_common(10)
names = [i[0] for i in a]
common_names = data[data['Common Name'].isin(names)]
b = Counter(data['Postal Code'].tolist()).most_common(9)
codes = [i[0] for i in b]
common_codes = data[data['Postal Code'].isin(codes)]
data2 = common_codes[common_codes['Common Name'].isin(names)]
b1 = data2[['Point X', 'Point Y', 'Postal Code', 'Species', 'Genus', 'Common Name']]
b1.head(5)
| Point X | Point Y | Postal Code | Species | Genus | Common Name | |
|---|---|---|---|---|---|---|
| 1 | -71.101275 | 42.339655 | 02115 | cordata | Tilia | Littleleaf linden |
| 2 | -71.091306 | 42.337096 | 02115 | acerifolia | Platanus | London planetree |
| 3 | -71.091495 | 42.337111 | 02115 | acerifolia | Platanus | London planetree |
| 4 | -71.091278 | 42.337240 | 02115 | acerifolia | Platanus | London planetree |
| 5 | -71.091397 | 42.337220 | 02115 | acerifolia | Platanus | London planetree |
import numpy as np
postal_codes = data2['Postal Code'].unique()
dropdown = alt.binding_select(options = [None] + list(postal_codes), labels = ['All'] + list(postal_codes))
code_select = alt.selection_single(
fields=["Postal Code"],
bind=dropdown,
name="Postal Codes",
)
C:\Users\ethan\anaconda3\lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection_single' is deprecated. Use 'selection_point' warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
brush = alt.selection_interval()
brush1 = alt.selection_interval()
selection = alt.selection_point(fields=['Postal Code'])
trees = alt.Chart(data2).mark_circle(size=30).encode(
longitude="Point X:Q",
latitude="Point Y:Q",
color="Postal Code",
#size=alt.Size('Diameter:Q', scale=alt.Scale(range=[10, 150])),
tooltip=["Genus","Common Name", "Postal Code", "Species", "Street Address"]
).add_params(brush, code_select, selection
).transform_filter(code_select). transform_filter(brush1
).properties(title='Trees Across Boston')
# Create an interactive histogram layer to show the count of trees by zip code
trees_by_code = alt.Chart(data2).mark_bar().encode(
alt.X('count():Q', title='Tree Count'),
alt.Y('Postal Code:N', title='Zip Code').sort('-x'),
color = 'Postal Code:N',
tooltip=['Postal Code', 'count()']
).add_params(selection).transform_filter(code_select).transform_filter(brush
).properties(title='Number of Trees by Postal Code')
common_trees = alt.Chart(data2).mark_bar().encode(
alt.X('count():Q', title='Tree Count'),
alt.Y('Common Name:N', title='Common Name').sort('-x'),
color='Postal Code',
tooltip=['Postal Code', 'count()']
).add_params(brush1).transform_filter(code_select
).transform_filter(brush).properties(title='Most Common Trees')
#trees & histogram | common_trees
zip_code_map + trees | trees_by_code & common_trees
#trees
For our interactive visualization, we wanted it to be exploratory for a viewer or organization. Since the motivating question for our project focusing on the current status and location of trees along the Mass Ave corridor, we felt the best route was to make a map the main aspect of our interactive visualization. Using the dataset provided by Speak for the Trees, we were able to obtain the location of trees as latitude/longitude (X/Y) coordinates as well as what postal code they are a part of. With this as well as a geojson file we found online, we were able to plot our coordinates on top of a map, so we are able to use color and clustering to represent the different postal codes of Boston. As for our supplementary visualizations, we wanted to more clearly represent differences between postal codes and different types of trees. Our first visual is just a simple bar chart representing the number of trees by postal code, and can update if a user wants to look at a certain area of Boston. Our second visual represents the most common trees across Boston, while also color coded to demonstrate the distribution of each type of tree in different postal codes. We also added the functionality for a user to select any individual postal code through a dropdown which will display just that postal code's data, and the Most common Trees stacked bar chart is also linked so we can see the individual distribution of one type of tree across Boston.
chart = alt.vconcat(zip_code_map + trees | trees_by_code & common_trees)
chart.save('Final_Viz.html')